In [74]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import glob
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
from geopandas import GeoDataFrame
sns.set(style='whitegrid')

Import facility data and NERC labels


In [2]:
path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
facility_df = pd.read_csv(path, parse_dates=['datetime'])

In [3]:
facility_df.head()


Out[3]:
f fuel month plant id total fuel (mmbtu) year generation (MWh) elec fuel (mmbtu) geography last_updated lat lon prime mover datetime quarter all fuel fossil CO2 (kg) elec fuel fossil CO2 (kg) all fuel total CO2 (kg) elec fuel total CO2 (kg)
0 M NG 3 10275 0.0 2017 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2017-03-01 1 0.0 0.0 0.0 0.0
1 M NG 2 10275 0.0 2017 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2017-02-01 1 0.0 0.0 0.0 0.0
2 M NG 1 10275 0.0 2017 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2017-01-01 1 0.0 0.0 0.0 0.0
3 M NG 12 10275 0.0 2016 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2016-12-01 4 0.0 0.0 0.0 0.0
4 M NG 11 10275 0.0 2016 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2016-11-01 4 0.0 0.0 0.0 0.0

In [4]:
facility_df.dropna(inplace=True, subset=['lat', 'lon'])

In [5]:
cols = ['lat', 'lon', 'plant id', 'year']
small_facility = facility_df.loc[:, cols].drop_duplicates()

In [6]:
geometry = [Point(xy) for xy in zip(small_facility.lon, small_facility.lat)]
# small_facility = small_facility.drop(['lon', 'lat'], axis=1)
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(small_facility, crs=crs, geometry=geometry)

Read NERC shapefile and merge with geo_df


In [7]:
path = os.path.join('Data storage', 'NERC_Regions_EIA', 'NercRegions_201610.shp')
regions = gpd.read_file(path)

In [8]:
facility_nerc = gpd.sjoin(geo_df, regions, how='inner', op='within')

In [9]:
facility_nerc.head()


Out[9]:
lat lon plant id year geometry index_right NERC NERC_Label
0 27.9114 -81.6006 10275 2017 POINT (-81.6006 27.9114) 1 FRCC Florida Reliability Coordinating Council (FRCC)
3 27.9114 -81.6006 10275 2016 POINT (-81.6006 27.9114) 1 FRCC Florida Reliability Coordinating Council (FRCC)
15 27.9114 -81.6006 10275 2015 POINT (-81.6006 27.9114) 1 FRCC Florida Reliability Coordinating Council (FRCC)
27 27.9114 -81.6006 10275 2014 POINT (-81.6006 27.9114) 1 FRCC Florida Reliability Coordinating Council (FRCC)
39 27.9114 -81.6006 10275 2013 POINT (-81.6006 27.9114) 1 FRCC Florida Reliability Coordinating Council (FRCC)

Merge NERC labels into the facility df


In [10]:
cols = ['plant id', 'year', 'NERC']
facility_df = facility_df.merge(facility_nerc.loc[:, cols],
                                on=['plant id', 'year'], how='left')

In [11]:
facility_df.head()


Out[11]:
f fuel month plant id total fuel (mmbtu) year generation (MWh) elec fuel (mmbtu) geography last_updated lat lon prime mover datetime quarter all fuel fossil CO2 (kg) elec fuel fossil CO2 (kg) all fuel total CO2 (kg) elec fuel total CO2 (kg) NERC
0 M NG 3 10275 0.0 2017 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2017-03-01 1 0.0 0.0 0.0 0.0 FRCC
1 M NG 2 10275 0.0 2017 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2017-02-01 1 0.0 0.0 0.0 0.0 FRCC
2 M NG 1 10275 0.0 2017 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2017-01-01 1 0.0 0.0 0.0 0.0 FRCC
3 M NG 12 10275 0.0 2016 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2016-12-01 4 0.0 0.0 0.0 0.0 FRCC
4 M NG 11 10275 0.0 2016 0.0 0.0 USA-FL 2017-05-24T14:26:30-04:00 27.9114 -81.6006 ALL 2016-11-01 4 0.0 0.0 0.0 0.0 FRCC

Filter out data older than 2014 to reduce size


In [12]:
facility_df['state'] = facility_df['geography'].str[-2:]
keep_cols = ['fuel', 'year', 'month', 'datetime', 'state', 'plant id', 'NERC',
             'generation (MWh)', 'total fuel (mmbtu)', 'elec fuel (mmbtu)']
facility_df = facility_df.loc[facility_df['year'] >= 2014, keep_cols]

In [13]:
facility_fuel_cats = {'COW': ['SUB', 'BIT', 'LIG', 'WC', 'SC', 'RC', 'SGC'],
                      'NG': ['NG'],
                      'PEL': ['DFO', 'RFO', 'KER', 'JF',
                              'PG', 'WO', 'SGP'],
                      'PC': ['PC'],
                      'HYC': ['WAT'],
                      'HPS': [],
                      'GEO': ['GEO'],
                      'NUC': ['NUC'],
                      'OOG': ['BFG', 'OG', 'LFG'],
                      'OTH': ['OTH', 'MSN', 'MSW', 'PUR', 'TDF', 'WH'],
                      'SUN': ['SUN'],
                      'DPV': [],
                      'WAS': ['OBL', 'OBS', 'OBG', 'MSB', 'SLW'],
                      'WND': ['WND'],
                      'WWW': ['WDL', 'WDS', 'AB', 'BLQ']
                      }

In [14]:
for category in facility_fuel_cats.keys():
    fuels = facility_fuel_cats[category]
    facility_df.loc[facility_df['fuel'].isin(fuels),
                    'fuel category'] = category

In [15]:
facility_df.head()


Out[15]:
fuel year month datetime state plant id NERC generation (MWh) total fuel (mmbtu) elec fuel (mmbtu) fuel category
0 NG 2017 3 2017-03-01 FL 10275 FRCC 0.0 0.0 0.0 NG
1 NG 2017 2 2017-02-01 FL 10275 FRCC 0.0 0.0 0.0 NG
2 NG 2017 1 2017-01-01 FL 10275 FRCC 0.0 0.0 0.0 NG
3 NG 2016 12 2016-12-01 FL 10275 FRCC 0.0 0.0 0.0 NG
4 NG 2016 11 2016-11-01 FL 10275 FRCC 0.0 0.0 0.0 NG

In [16]:
facility_df.dtypes


Out[16]:
fuel                          object
year                           int64
month                          int64
datetime              datetime64[ns]
state                         object
plant id                       int64
NERC                          object
generation (MWh)             float64
total fuel (mmbtu)           float64
elec fuel (mmbtu)            float64
fuel category                 object
dtype: object

In [17]:
facility_df.loc[facility_df['NERC'].isnull(), 'state'].unique()


Out[17]:
array(['HI', 'FL', 'VA', 'MI', 'ME', 'MN', 'CA', 'AK', 'NY', 'MD', 'WI',
       'NH', 'PA', 'OR', 'MA', 'IL', 'DC', 'RI', 'TX', 'CT', 'WA'], dtype=object)

Import state-level generation data


In [26]:
folder = os.path.join('Data storage', 'Derived data', 'state gen data')
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", 
          "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", 
          "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", 
          "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", 
          "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

In [29]:
state_list = []
for state in states:
    path = os.path.join(folder, '{} fuels gen.csv'.format(state))
    df = pd.read_csv(path, parse_dates=['datetime'])
    state_list.append(df)
state_df = pd.concat(state_list)
state_df.reset_index(inplace=True, drop=True)

In [30]:
state_df.dtypes


Out[30]:
end                            int64
f                             object
geography                     object
last_updated                  object
sector                         int64
series_id                     object
start                          int64
type                          object
units                         object
year                           int64
month                          int64
generation (MWh)             float64
datetime              datetime64[ns]
quarter                        int64
total fuel (mmbtu)           float64
elec fuel (mmbtu)            float64
all fuel CO2 (kg)            float64
elec fuel CO2 (kg)           float64
dtype: object

In [31]:
state_df['state'] = state_df['geography'].str[-2:]
keep_cols = ['state', 'type', 'year', 'datetime', 'generation (MWh)',
             'elec fuel (mmbtu)']

fuel_cats = facility_fuel_cats.keys()
state_df = state_df.loc[(state_df['year'] >= 2014) &
                        (state_df['type'].isin(fuel_cats)), keep_cols]

In [32]:
state_df['type'].unique()


Out[32]:
array(['COW', 'HYC', 'NUC', 'NG', 'PEL', 'DPV', 'OTH', 'OOG', 'WWW', 'SUN',
       'WAS', 'WND', 'HPS', 'PC', 'GEO'], dtype=object)

Total generation and fuel consumption for each fuel category

Annual


In [33]:
annual_facility = facility_df.groupby(['year', 'state', 'fuel category']).sum()
# annual_facility.reset_index(inplace=True)
annual_facility.drop('plant id', axis=1, inplace=True)

In [34]:
annual_facility.head()


Out[34]:
generation (MWh) elec fuel (mmbtu)
year state fuel category
2014 AK COW 558292.181 7216953.0
HYC 1538738.000 14633403.0
NG 3288022.319 32828304.0
OOG 56165.769 546450.0
PEL 445621.447 6927101.0

In [35]:
annual_state = state_df.groupby(['year', 'state', 'type']).sum()
# annual_state.reset_index(inplace=True)

In [36]:
annual_state.head(n=25)


Out[36]:
generation (MWh) elec fuel (mmbtu)
year state type
2014 AK COW 558292.17 7216950.0
HYC 1538738.00 NaN
NG 3288022.33 32828310.0
OOG NaN NaN
OTH -2312.99 NaN
PEL 445621.46 6927090.0
WAS 62511.68 NaN
WND 151957.00 NaN
WWW 0.00 NaN
AL COW 47301626.28 488993810.0
DPV 3101.38 NaN
HYC 9466872.01 NaN
NG 48270074.40 362215370.0
NUC 41243689.00 NaN
OOG 180403.48 NaN
OTH 140.51 NaN
PEL 98100.01 1199180.0
WAS 46936.84 NaN
WWW 2732084.23 NaN
AR COW 33220754.79 334098580.0
DPV 4853.48 NaN
HPS 67070.00 NaN
HYC 2639776.01 NaN
NG 9613708.03 70429870.0
NUC 14478259.00 NaN

It's interesting that the facility data has fuel consumption for solar generation and the state data doesn't. Looking at a 923 data file, it's clear that the fuel consumption is just based on a conversion efficiency of 36.6% across all facilities.


In [37]:
annual_state.loc[2016, 'CA', 'SUN']


Out[37]:
generation (MWh)     19030396.62
elec fuel (mmbtu)            NaN
Name: (2016, CA, SUN), dtype: float64

In [38]:
annual_facility.loc[2016, 'CA', 'SUN']


Out[38]:
generation (MWh)      14354970.0
elec fuel (mmbtu)    133773953.0
Name: (2016, CA, SUN), dtype: float64

How much generation from large sources (Hydro, wind, coal, natural gas, and nuclear) is missed by monthly 923 data?


In [40]:
for fuel in ['HYC', 'WND', 'COW', 'NG', 'NUC', 'SUN']:
    state_total = annual_state.loc[2016, :, fuel]['generation (MWh)'].sum()
    facility_total = annual_facility.loc[2016, :, fuel]['generation (MWh)'].sum()
    
    error = (state_total - facility_total) / state_total
    print('{} has an error of {:.2f}%'.format(fuel, error * 100))


HYC has an error of 24.60%
WND has an error of 3.35%
COW has an error of 1.15%
NG has an error of 5.21%
NUC has an error of 0.00%
SUN has an error of 37.42%

2015 generation and fuel consumption from annual vs monthly reporting plants

The goal here is to figure out how much of generation and fuel consumption from facilities that only report annually is in each NERC region (by state)


In [18]:
path = os.path.join('Data storage', 'EIA923_Schedules_2_3_4_5_M_12_2015_Final.xlsx')
frequency = pd.read_excel(path, sheetname='Page 6 Plant Frame', header=4)

In [19]:
frequency.head()


Out[19]:
YEAR Plant Id Plant State Sector Number NAICS Code Plant Name Combined Heat And Power Status Reporting Frequency
0 2015 2 AL 1 22 Bankhead Dam N A
1 2015 3 AL 1 22 Barry N M
2 2015 4 AL 1 22 Walter Bouldin Dam N M
3 2015 7 AL 1 22 Gadsden Y A
4 2015 8 AL 1 22 Gorgas N M

In [20]:
frequency.rename(columns={'Plant Id': 'plant id',
                          'Plant State': 'state',
                          'YEAR': 'year',
                          'Reporting\nFrequency': 'Reporting Frequency'}, inplace=True)

In [99]:
frequency.head()


Out[99]:
year plant id state Sector Number NAICS Code Plant Name Combined Heat And Power Status Reporting Frequency
0 2015 2 AL 1 22 Bankhead Dam N A
1 2015 3 AL 1 22 Barry N M
2 2015 4 AL 1 22 Walter Bouldin Dam N M
3 2015 7 AL 1 22 Gadsden Y A
4 2015 8 AL 1 22 Gorgas N M

In [21]:
frequency.dtypes


Out[21]:
year                                int64
plant id                            int64
state                              object
Sector Number                       int64
NAICS Code                          int64
Plant Name                         object
Combined Heat And\nPower Status    object
Reporting Frequency                object
dtype: object

Make a dataframe with generation, fuel consumption, and reporting frequency of facilities in 2015


In [22]:
freq_cols = ['year', 'plant id', 'Reporting Frequency']
df = pd.merge(facility_df, frequency.loc[:, freq_cols], on=['year', 'plant id'])

In [80]:
df.head()


Out[80]:
fuel year month datetime state plant id NERC generation (MWh) total fuel (mmbtu) elec fuel (mmbtu) fuel category Reporting Frequency
0 NG 2015 12 2015-12-01 FL 10275 FRCC 4344.509 55210.0 22133.0 NG A
1 NG 2015 11 2015-11-01 FL 10275 FRCC 4304.052 54695.0 21927.0 NG A
2 NG 2015 10 2015-10-01 FL 10275 FRCC 4810.546 61133.0 24507.0 NG A
3 NG 2015 9 2015-09-01 FL 10275 FRCC 5058.453 64282.0 25770.0 NG A
4 NG 2015 8 2015-08-01 FL 10275 FRCC 5404.571 68680.0 27533.0 NG A

In [78]:
g = sns.factorplot(x='fuel category', y='generation (MWh)', hue='Reporting Frequency',
                   col='NERC', col_wrap=3, data=df, estimator=np.sum, ci=0, kind='bar',
                   palette='tab10')

g.set_xticklabels(rotation=30)


Out[78]:
<seaborn.axisgrid.FacetGrid at 0x16fb2e198>

Number of NERC regions in a state


In [24]:
df.loc[df['state'] == 'TX', 'NERC'].nunique()


Out[24]:
3

Fraction of generation/consumption from Annual reporting facilities in each NERC region of a state

This is development of a method that will be used to approximate the fraction of EIA-estimated generation and consumption within each state that gets apportioned to each NERC regions (when there is more than one). The idea is to take data from the most recent "final" EIA-923 and use the annual reporting facilities to approximate the divisions for more recent data. I still need to figure out if it's better to do the calculation by month within a year or just for the year as a whole.

Determining if it's better to do month-by-month vs a single value for the whole year will depend on if the share of generation/consumption from Annual reporting facilities in each NERC changes much over the course of the year. There is the potential for error either way, and maybe even differences by state. Annual is certainly simpler.

While looking at data for Texas, I've discovered that generation from Annual reporting facilities can be negative. Need to figure out how (if?) to deal with this...

Conclusion

While there can be variation of % generation in each NERC within a state over the course of 2015, most fuel categories across most states are quite stable. And when fuels do a have a wide spread over the year, they also tend to not be a large fraction of total generation within the NERC region. Given these observations, I'm going to stick with a split calculated as the average over an entire year.


In [81]:
def annual(df, state):
    """Return the percent of gen & consumption by fuel type in each NERC region
    for a state"""
    a = df.loc[(df.state == state) & 
                     (df['Reporting Frequency'] == 'A')].copy()
    a.drop(['plant id', 'year'], axis=1, inplace=True)
    a = a.groupby(['NERC', 'fuel category']).sum()
    
    fuels = set(a.index.get_level_values('fuel category'))
    
    temp_list = []
    for fuel in fuels:
        temp = (a.xs(fuel, level='fuel category')
                / a.xs(fuel, level='fuel category').sum())
        temp['fuel category'] = fuel
        temp_list.append(temp)
    
    result = pd.concat(temp_list)
    result.reset_index(inplace=True)
    result['state'] = state
    
    rename_cols = {'generation (MWh)': '% generation',
                   'total fuel (mmbtu)': '% total fuel',
                   'elec fuel (mmbtu)': '% elec fuel'}
    
    result.rename(columns=rename_cols, inplace=True)
    
    return result

In [30]:
def annual_month(df, state):
    """Return the percent of gen & consumption by fuel type and month in each 
    NERC region for a state"""
    a = df.loc[(df.state == state) & 
                     (df['Reporting Frequency'] == 'A')].copy()
    a.drop(['plant id', 'year'], axis=1, inplace=True)
    a = a.groupby(['NERC', 'fuel category', 'month']).sum()
    
    fuels = set(a.index.get_level_values('fuel category'))
    
    temp_list = []
    for fuel in fuels:
        for month in range(1, 13):
            temp = (a.xs(fuel, level='fuel category')
                     .xs(month, level='month')
                    / a.xs(fuel, level='fuel category')
                       .xs(month, level='month')
                       .sum())
            temp['fuel category'] = fuel
            temp['month'] = month
            temp_list.append(temp)
    
    result = pd.concat(temp_list)
    result.reset_index(inplace=True)
    result['state'] = state
    
    rename_cols = {'generation (MWh)': '% generation',
                   'total fuel (mmbtu)': '% total fuel',
                   'elec fuel (mmbtu)': '% elec fuel'}
    
    result.rename(columns=rename_cols, inplace=True)
    
    return result

This is the percent of generation, total fuel consumption, and electric fuel consumption from facilities that report annually to EIA-923


In [82]:
df_list = []
for state in states:
    num_nerc = df.loc[df.state == state, 'NERC'].nunique()
    if num_nerc > 1:
        df_list.append(annual(df, state))

In [31]:
df_list = []
for state in states:
    num_nerc = df.loc[df.state == state, 'NERC'].nunique()
    if num_nerc > 1:
        df_list.append(annual_month(df, state))

In [32]:
fuel_by_nerc_month = pd.concat(df_list).reset_index(drop=True)

In [83]:
fuel_by_nerc = pd.concat(df_list).reset_index(drop=True)

In [84]:
fuel_by_nerc.head()


Out[84]:
NERC month % generation % total fuel % elec fuel fuel category state
0 - 0.333333 0.000066 0.034382 0.000065 WWW AR
1 SERC 0.666667 0.999934 0.965618 0.999935 WWW AR
2 SERC 1.000000 NaN NaN NaN PC AR
3 - 0.276596 0.002770 0.001223 0.002772 NG AR
4 SERC 0.446809 0.219980 0.636033 0.175285 NG AR

In [214]:
fuel_by_nerc_month.tail()


Out[214]:
NERC % generation % total fuel % elec fuel fuel category month state
2704 RFC 0.249526 0.382119 0.513470 NG 10 WI
2705 MRO 0.946647 0.666793 0.665857 NG 11 WI
2706 RFC 0.053353 0.333207 0.334143 NG 11 WI
2707 MRO 0.923901 0.658481 0.718664 NG 12 WI
2708 RFC 0.076099 0.341519 0.281336 NG 12 WI

In [ ]:
st

In [27]:
split_states = []
for state in states:
    if df.loc[df.state == state, 'NERC'].nunique() > 1:
        split_states.append(state)

In [28]:
split_states


Out[28]:
['AR',
 'FL',
 'IL',
 'IA',
 'KS',
 'KY',
 'LA',
 'MI',
 'MO',
 'NE',
 'NM',
 'NC',
 'OK',
 'SD',
 'TX',
 'VA',
 'WI']

In [56]:
cols = ['state', 'NERC', 'fuel category']
a = fuel_by_nerc_month.groupby(cols).std()
a.drop('month', axis=1, inplace=True)

In [63]:
a.xs('AR', level='state')


Out[63]:
% generation % total fuel % elec fuel
NERC fuel category
- HYC 4.928033e-09 4.608873e-07 4.608873e-07
NG 3.047858e-04 4.821993e-04 2.272056e-04
PEL 0.000000e+00 0.000000e+00 0.000000e+00
SUN NaN NaN NaN
WWW 2.242653e-05 6.797735e-03 1.781255e-05
SERC HYC 6.275202e-09 8.077144e-07 8.077144e-07
NG 8.582103e-02 1.433441e-01 6.745018e-02
OOG 3.920610e-08 4.330886e-06 4.330886e-06
PC NaN NaN NaN
PEL 0.000000e+00 0.000000e+00 0.000000e+00
WAS 0.000000e+00 0.000000e+00 0.000000e+00
WWW 2.242653e-05 6.797735e-03 1.781255e-05
SPP HYC 5.370029e-09 6.464797e-07 6.464797e-07
NG 8.551625e-02 1.428619e-01 6.722309e-02
OOG 3.920610e-08 4.330886e-06 4.330886e-06

In [70]:
a[a > .1].dropna(how='all')


Out[70]:
% generation % total fuel % elec fuel
state NERC fuel category
AR SERC NG NaN 0.143344 NaN
SPP NG NaN 0.142862 NaN
FL FRCC COW 0.102601 NaN 0.134697
SERC COW 0.102601 NaN 0.134697
IA - NG 0.303138 0.264098 0.300394
MRO COW 0.132677 NaN 0.140241
NG 0.211916 0.213591 0.214703
SERC COW 0.153661 NaN 0.152410
NG 0.121395 NaN 0.113601
MI MRO PEL 0.579756 NaN NaN
RFC PEL 0.579756 NaN NaN
TX SPP WND 0.214635 0.214635 0.214635
WWW 5.272077 NaN 0.124454
TRE WND 0.214635 0.214635 0.214635
WWW 5.272077 NaN 0.124454
VA RFC COW 0.169581 NaN 0.185219
NG NaN NaN 0.116589
PEL NaN 0.115835 NaN
SERC COW 0.169581 NaN 0.185219
NG NaN NaN 0.116589
PEL NaN 0.115835 NaN
WI MRO NG 0.159994 NaN 0.143071
PEL 0.187468 NaN NaN
RFC NG 0.159994 NaN 0.143071
PEL 0.187468 NaN NaN

In [75]:
fuels = ['NG', 'HYC', 'COW', 'GEO', 'WND', 'SUN']

sns.factorplot(x='month', y='% generation', hue='fuel category', col='NERC',
               row='state',
               data=fuel_by_nerc_month.loc[(fuel_by_nerc_month['fuel category'].isin(fuels)) &
                                           (fuel_by_nerc_month['NERC'] != '-')],
               n_boot=1)
path = os.path.join('Figures', 'SI', 'Annual facility seasonal gen variation.pdf')
# plt.savefig(path, bbox_inches='tight')



In [222]:
fuel_by_nerc_month.loc[(fuel_by_nerc_month.state=='TX') &
                       (fuel_by_nerc_month['fuel category'] == 'WWW')]


Out[222]:
NERC % generation % total fuel % elec fuel fuel category month state
2265 SPP 4.484362 0.538867 0.562231 WWW 1 TX
2266 TRE -3.484362 0.461133 0.437769 WWW 1 TX
2267 SPP 4.132260 0.473945 0.564103 WWW 2 TX
2268 TRE -3.132260 0.526055 0.435897 WWW 2 TX
2269 SPP 5.280268 0.519683 0.550018 WWW 3 TX
2270 TRE -4.280268 0.480317 0.449982 WWW 3 TX
2271 SPP 2.961378 0.697669 0.725501 WWW 4 TX
2272 TRE -1.961378 0.302331 0.274499 WWW 4 TX
2273 SPP -5.065479 0.586754 0.959101 WWW 5 TX
2274 TRE 6.065479 0.413246 0.040899 WWW 5 TX
2275 SPP 3.538606 0.417402 0.656162 WWW 6 TX
2276 TRE -2.538606 0.582598 0.343838 WWW 6 TX
2277 SPP 5.823124 0.481409 0.577494 WWW 7 TX
2278 TRE -4.823124 0.518591 0.422506 WWW 7 TX
2279 SPP 2.823845 0.542277 0.564253 WWW 8 TX
2280 TRE -1.823845 0.457723 0.435747 WWW 8 TX
2281 SPP 4.395278 0.587122 0.573002 WWW 9 TX
2282 TRE -3.395278 0.412878 0.426998 WWW 9 TX
2283 SPP -12.152599 0.618150 0.499494 WWW 10 TX
2284 TRE 13.152599 0.381850 0.500506 WWW 10 TX
2285 SPP 4.745620 0.507283 0.534694 WWW 11 TX
2286 TRE -3.745620 0.492717 0.465306 WWW 11 TX
2287 SPP 3.483764 0.473996 0.558542 WWW 12 TX
2288 TRE -2.483764 0.526004 0.441458 WWW 12 TX

In [232]:
df.loc[(df.state == 'TX') &
       (df['fuel category'] == 'WWW') &
       (df['Reporting Frequency'] == 'A')].groupby(['NERC', 'month', 'fuel category']).sum()


Out[232]:
year plant id generation (MWh) total fuel (mmbtu) elec fuel (mmbtu)
NERC month fuel category
SPP 1 WWW 6045 158335 24477.085 1031862.0 270598.0
2 WWW 6045 158335 23591.727 1002454.0 260114.0
3 WWW 6045 158335 23338.641 1055755.0 258287.0
4 WWW 6045 158335 23981.906 1079494.0 265683.0
5 WWW 6045 158335 22927.767 1020067.0 253919.0
6 WWW 6045 158335 19081.413 824972.0 210964.0
7 WWW 6045 158335 22039.664 994750.0 243501.0
8 WWW 6045 158335 22143.524 1014658.0 243921.0
9 WWW 6045 158335 22219.108 1071510.0 245327.0
10 WWW 6045 158335 15374.204 712726.0 169390.0
11 WWW 6045 158335 22856.243 1012581.0 253042.0
12 WWW 6045 158335 21888.202 964300.0 241652.0
TRE 1 WWW 6045 75188 -19018.764 883010.0 210695.0
2 WWW 6045 75188 -17882.569 1112673.0 200997.0
3 WWW 6045 75188 -18918.668 975783.0 211310.0
4 WWW 6045 75188 -15883.681 467792.0 100523.0
5 WWW 6045 75188 -27454.045 718424.0 10828.0
6 WWW 6045 75188 -13689.060 1151472.0 110548.0
7 WWW 6045 75188 -18254.812 1071581.0 178150.0
8 WWW 6045 75188 -14301.901 856449.0 188369.0
9 WWW 6045 75188 -17163.885 753512.0 182816.0
10 WWW 6045 75188 -16639.300 440272.0 169733.0
11 WWW 6045 75188 -18039.961 983507.0 220204.0
12 WWW 6045 75188 -15605.285 1070104.0 190996.0

In [230]:
df.loc[(df.state == 'TX') &
       (df['fuel category'] == 'WWW') &
       (df['Reporting Frequency'] == 'A')].groupby(['NERC', 'fuel category']).sum()


Out[230]:
year month plant id generation (MWh) total fuel (mmbtu) elec fuel (mmbtu)
NERC fuel category
SPP WWW 72540 234 1900020 263919.484 11785129.0 2916398.0
TRE WWW 72540 234 902256 -212851.931 10484579.0 1975169.0

States that include more than one NERC region


In [79]:
NERC_states = ['WY', 'SD', 'NE', 'OK', 'TX', 'NM', 'LA', 'AR',
               'MO', 'MN', 'IL', 'KY', 'VA', 'FL']

In [93]:
error_list = []
for state in NERC_states:
    error = (annual_state.loc[2016, state]
             - annual_facility.loc[2016, state]) / annual_state.loc[2016, state]
    error['state'] = state
    
    for col in ['generation (MWh)']:#, 'elec fuel (mmbtu)']:
        if error.loc[error[col] > 0.05, col].any():
            error_list.append(error.loc[error[col] > 0.05])

The dataframe below shows all states with more than one NERC region where facility generation is at least 5% below EIA's state-level estimate in 2016.


In [94]:
pd.concat(error_list)


Out[94]:
generation (MWh) elec fuel (mmbtu) state
COW 0.057014 0.058988 WY
HYC 0.095835 NaN WY
NG 0.413092 0.508736 WY
NG 0.212550 0.233675 SD
HYC 1.000000 NaN NE
NG 0.141895 0.122942 NE
WAS 1.000000 NaN NE
HYC 0.327894 NaN OK
OTH 0.359396 NaN OK
PEL 0.098354 0.089782 OK
WAS 1.000000 NaN OK
WWW 1.000000 NaN OK
HYC 0.688630 NaN TX
NG 0.053121 0.062912 TX
OOG 0.132470 NaN TX
OTH 0.533980 NaN TX
SUN 0.247843 NaN TX
WAS 1.000000 NaN TX
WWW 0.443587 NaN TX
GEO 1.000000 NaN NM
NG 0.090467 0.085517 NM
SUN 0.638877 NaN NM
WAS 1.000000 NaN NM
NG 0.055065 0.050793 LA
OOG 0.100225 NaN LA
OTH 0.580154 NaN LA
WAS 0.930467 NaN LA
WWW 0.516165 NaN LA
HYC 0.202653 NaN AR
WAS 1.000000 NaN AR
WWW 0.086687 NaN AR
NG 0.089319 0.123133 MO
SUN 0.860146 NaN MO
WAS 1.000000 NaN MO
HYC 0.942665 NaN MN
NG 0.064268 0.084587 MN
OTH 0.555342 NaN MN
SUN 0.794821 NaN MN
WAS 0.711827 NaN MN
WND 0.112260 NaN MN
WWW 0.596931 NaN MN
NG 0.060489 0.059321 IL
OOG 0.961357 NaN IL
OTH 0.987482 NaN IL
PEL 0.052267 0.059331 IL
SUN 1.000000 NaN IL
WAS 1.000000 NaN IL
HYC 0.128468 NaN KY
WAS 1.000000 NaN KY
HYC 1.139389 NaN VA
OTH 0.164274 NaN VA
PEL 0.055163 0.055660 VA
WAS 0.642315 NaN VA
WWW 0.095183 NaN VA
HYC 1.000000 NaN FL
OTH 0.318424 NaN FL
SUN 0.317072 NaN FL
WAS 0.585390 NaN FL
WWW 0.466676 NaN FL